### Readme
#Please make sure the following package is installed.
#install.packages("sandwich")
#install.packages("lmtest")
#install.packages("mvtnorm")
#install.packages("numDeriv")
#install.packages("numDeriv")
#install.packages("Zelig")
#install.packages("foreign")

#If you are using R on Apple OS or linux, please search for "savePlot" and comment out all "savePlot" line.

#If you mean any prolem running this R code, please copy the whole console output in R Console, 
# first select Menu "Edit->Clear console", then run my R code, 
# then select Menu "Edit->Select all", and Copy the content and send to suyanfang@gmail.com

###Thanks!

install.packages("sandwich")
install.packages("lmtest")
install.packages("mvtnorm")
install.packages("numDeriv")
library(numDeriv)
library(sandwich)
library(lmtest)
#from 
#http://jjharden.web.unc.edu/archives/344
robust.se <- function(model, cluster){
	M <- length(unique(cluster))
	N <- length(cluster)           
	K <- model$rank
	dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
	uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
	rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
	rcse.se <- coeftest(model, rcse.cov)
	#return (list(rcse.cov, rcse.se))
	return (rcse.se)
}

library(Zelig)
library(foreign)
HIKAP.survey.ori <- read.dta("HIKAP_survey.dta")
head(HIKAP.survey.ori)

#9950 and 9951 row are all NA except realdivision column. should we Remove them?
#HIKAP.survey.ori <- HIKAP.survey.ori[1:9949,]
#note we will have two unknown sex if 9950 and 9951 not removed
sex <- as.vector(as.matrix(HIKAP.survey.ori["sex"]))
#sex[sex==""] <- "U"
form <- as.vector(as.matrix(HIKAP.survey.ori["form"]))
#form[is.na(form)] <- 12345000
age <- as.vector(as.matrix(HIKAP.survey.ori["age"]))
SDtreat <- as.vector(as.matrix(HIKAP.survey.ori["SDtreat"]))
#SDtreat[is.na(SDtreat)] <- 12345000
realdivision <- as.vector(as.matrix(HIKAP.survey.ori["realdivision"]))
realdivision[realdivision==""] <- NA
eversex <- as.vector(as.matrix(HIKAP.survey.ori["eversex"]))
evercondom <- as.vector(as.matrix(HIKAP.survey.ori["evercondom"]))
condomlast <- as.vector(as.matrix(HIKAP.survey.ori["condomlast"]))
secschoolid <- as.vector(as.matrix(HIKAP.survey.ori["secschoolid"]))

regpartner <- as.vector(as.matrix(HIKAP.survey.ori["regpartner"]))
agepartner <- as.vector(as.matrix(HIKAP.survey.ori["agepartner"]))

condomregpartner <- as.vector(as.matrix(HIKAP.survey.ori["condomregpartner"]))

agepartner <- as.vector(as.matrix(HIKAP.survey.ori["agepartner"]))

giftpartner <- as.vector(as.matrix(HIKAP.survey.ori["giftpartner"]))

unsampled <- as.vector(as.matrix(HIKAP.survey.ori["unsampled"]))
boysonlyschool <- as.vector(as.matrix(HIKAP.survey.ori["boysonlyschool"]))
girlsonlyschool <- as.vector(as.matrix(HIKAP.survey.ori["girlsonlyschool"]))
girlsperboy <- as.vector(as.matrix(HIKAP.survey.ori["girlsperboy"]))
missinggirlsperboy <- as.vector(as.matrix(HIKAP.survey.ori["missinggirlsperboy"]))
sampleSD <- as.vector(as.matrix(HIKAP.survey.ori["sampleSD"]))
HIVtreat <- as.vector(as.matrix(HIKAP.survey.ori["HIVtreat"]))
HIKAP.survey.factor <- as.factor(realdivision)

#########################################################################################################
###susu add 042511###
#gen RRWithoutTT=1 if sampleSD==1 & HIVtreat==0
RRWithoutTT <- numeric(length(sex))
RRWithoutTT[] <- NA
RRWithoutTT[sampleSD==1 & HIVtreat==0] <- 1
# replace RRWithoutTT=0 if sampleSD==0 & HIVtreat==0
RRWithoutTT[sampleSD==0 & HIVtreat==0] <- 0

#gen RRconTT=1 if sampleSD==1 & HIVtreat==1
RRconTT <- numeric(length(sex))
RRconTT[] <- NA
RRconTT[sampleSD==1 & HIVtreat==1] <- 1
# replace RRconTT=0 if sampleSD==0 & HIVtreat==1
RRconTT[sampleSD==0 & HIVtreat==1] <- 0

#gen RRandTT=1 if sampleSD==1 & HIVtreat==1
RRandTT <- numeric(length(sex))
RRandTT[] <- NA
RRandTT[sampleSD==1 & HIVtreat==1] <- 1
# replace RRandTT=0 if sampleSD==0 & HIVtreat==0
RRandTT[sampleSD==0 & HIVtreat==0] <- 0

#gen TTWithoutRR=1 if sampleSD==0 & HIVtreat==1
TTWithoutRR <- numeric(length(sex))
TTWithoutRR[] <- NA
TTWithoutRR[sampleSD==0 & HIVtreat==1] <- 1
# replace TTWithoutRR=0 if sampleSD==0 & HIVtreat==0
TTWithoutRR[sampleSD==0 & HIVtreat==0] <- 0

rrtt.subvector <- cbind(RRWithoutTT,RRconTT,RRandTT, TTWithoutRR)
#data clean up

#gen girl=0 if sex=="M"
girl <- numeric(length(sex))
girl[is.na(sex)] <- NA
girl[sex=="M"] <- 0
#	replace girl=1 if sex=="F"
girl[sex=="F"] <- 1
#gen agemissing=0
agemissing <- numeric(length(sex))
#	replace agemissing=1 if age==.
agemissing[is.na(age)] <- 1
#	sum age if girl==0
boy.mean.age <- mean(age[girl==0 & !is.na(age)])
#	replace age=r(mean) if agemissing==1&girl==0 
age[agemissing==1&girl==0] <- boy.mean.age
#	sum age if girl==1
girl.mean.age <- mean(age[girl==1 & !is.na(age)])
#	replace age=r(mean) if agemissing==1&girl==1 
age[agemissing==1&girl==1] <- girl.mean.age

#gen cluster=secschoolid*form
cluster <- secschoolid * form
#gen cohort=0 if form==2
cohort <- numeric(length(form))
cohort[is.na(form)] <- NA
#	replace cohort=1 if form==1
cohort[form==1] <- 1

	
#gen multiplepartner=0 if eversex!=.
multiplepartner <- numeric(length(eversex))
multiplepartner[is.na(eversex)] <- NA
#	replace multiplepartner=1 if eversex==1
multiplepartner[eversex == 1] <- 1

#gen everplayedsex=0 if eversex!=.
everplayedsex <- numeric(length(eversex))
everplayedsex[is.na(eversex)] <- NA
#	replace everplayedsex=1 if eversex<3
everplayedsex[eversex < 3] <- 1

#gen everusedcondom=evercondom
everusedcondom <- evercondom
#	recode everusedcondom (2=0) (3=.)
everusedcondom[everusedcondom==2] <- 0
everusedcondom[everusedcondom==3] <- NA

#gen condomlastsex=condomlast
condomlastsex <- condomlast
#	recode condomlastsex (2=0) (3=.)
condomlastsex[condomlastsex==2] <- 0
condomlastsex[condomlastsex==3] <- NA

#recode regpartner (2=0)
regpartner[regpartner==2] <- 0
#	replace regpartner=1 if agepartner!=.
regpartner[!is.na(agepartner)] <- 1

#recode condomregpartner (2=0) (3=.)
condomregpartner[condomregpartner==2] <- 0
condomregpartner[condomregpartner==3] <- NA
#	replace condomregpartner=. if regpartner!=1
condomregpartner[regpartner!=1 | is.na(regpartner)] <- NA

#recode giftpartner (2=0) (3=.)
giftpartner[giftpartner==2] <- 0
giftpartner[giftpartner==3] <- NA
#	replace giftpartner=. if regpartner!=1
giftpartner[regpartner!=1 | is.na(regpartner)] <- NA

#replace condomlastsex=condomregpartner if condomlastsex==.&condomregpartner!=.
condomlastsex[is.na(condomlastsex) & !is.na(condomregpartner)] <- condomregpartner[is.na(condomlastsex) & !is.na(condomregpartner)]

#replace agepartner=. if agepartner<13
agepartner[agepartner<13] <- NA
#gen agegap=agepartner-age
agegap = agepartner - age
#gen gapabove5=0 if agepartner!=.
gapabove5 <- numeric(length(agepartner))
gapabove5[is.na(agepartner)] <- NA
#	replace gapabove5=1 if agegap>5&agegap!=.
gapabove5[agegap > 5] <- 1
	
#gen everplayedsex_2=everplayedsex
everplayedsex_2 <- everplayedsex
#	replace everplayedsex_2=1 if regpartner==1
everplayedsex_2[regpartner == 1] <- 1
#gen condomlastsex_2=condomlastsex
condomlastsex_2 <- condomlastsex;
#	replace condomlastsex=. if everplayedsex!=1
condomlastsex[everplayedsex != 1 | is.na(everplayedsex)] <- NA
#	replace condomlastsex_2=. if everplayedsex_2!=1
condomlastsex_2[everplayedsex_2 != 1 | is.na(everplayedsex_2)] <- NA

#gen gapabove5_2=gapabove5
gapabove5_2 <- gapabove5
#	replace gapabove5=. if everplayedsex!=1
gapabove5[everplayedsex != 1 | is.na(everplayedsex)] <- NA
#gen regpartner_2=regpartner
regpartner_2 <- regpartner;
#   !!!ISSUE2!!! regpartner_2 not replaced but regpartner replaced
#	replace regpartner=0 if everplayedsex==0
regpartner[everplayedsex==0] <- 0
#gen everusedcondom_2=everusedcondom
everusedcondom_2 <- everusedcondom
#	replace everusedcondom=. if everplayedsex!=1
everusedcondom[everplayedsex != 1 | is.na(everplayedsex)] <- NA
#	replace everusedcondom_2=. if everplayedsex_2!=1
everusedcondom_2[everplayedsex_2 != 1 | is.na(everplayedsex_2)] <- NA
#gen activeunsafe=0 if everplayedsex!=.
activeunsafe <- numeric(length(everplayedsex))
activeunsafe[is.na(everplayedsex)] <- NA
#	replace activeunsafe=1 if everplayedsex==1&everusedcondom==0
activeunsafe[everplayedsex==1 & everusedcondom==0] <- 1
#gen activeunsafe_2=0 if everplayedsex_2!=.
activeunsafe_2 <- numeric(length(everplayedsex_2))
activeunsafe_2[is.na(everplayedsex_2)] <- NA
#	replace activeunsafe_2=1 if everplayedsex_2==1&everusedcondom_2==0
activeunsafe_2[everplayedsex_2==1 & everusedcondom_2==0] <- 1
#gen giftpartner_2=giftpartner
giftpartner_2 <- giftpartner
#   !!!ISSUE2!!! giftpartner_2 not replaced but giftpartner replaced
giftpartner_2[giftpartner_2==3 | regpartner_2==0 | is.na(regpartner_2)] <- NA
giftpartner_2[giftpartner_2==2] <- 0
#	replace giftpartner=. if everplayedsex!=1
giftpartner[everplayedsex != 1 | is.na(everplayedsex)] <- NA
#replace giftpartner=. if giftpartner==3|regpartner==0|regpartner==.
giftpartner[giftpartner==3 | regpartner==0 | is.na(regpartner)] <- NA
#recode giftpartner 2=0
giftpartner[giftpartner==2] <- 0

#sort secschoolid;

#global indiv_controls="unsampled age agemissing "
#global school_controls="boysonlyschool girlsonlyschool girlsperboy missinggirlsperboy "
#global location_controls="i.realdivision"

#	xi: reg everplayedsex_2 sampleSD HIVtreat  ${indiv_controls} ${school_controls}  ${location_controls}, clust(cluster), if form==1&girl==0
#foreach var of varlist multiplepartner regpartner_2 gapabove5_2 everplayedsex_2 activeunsafe_2 condomlastsex_2 giftpartner_2 {
#multiplepartner regpartner_2 gapabove5_2 giftpartner_2 everplayedsex_2 activeunsafe_2 condomlastsex_2 
x.data <- data.frame(everplayedsex_2,sampleSD,HIVtreat,unsampled,age,agemissing,boysonlyschool,girlsonlyschool,girlsperboy,missinggirlsperboy,factor(realdivision))
lm.testres <- lm(everplayedsex_2 ~ sampleSD + HIVtreat + unsampled + age + agemissing + boysonlyschool + girlsonlyschool + girlsperboy + missinggirlsperboy + factor(realdivision),
	data=x.data, subset=girl==1&form==2&!is.na(realdivision)&!is.na(everplayedsex_2))
robust.se(lm.testres, cluster[girl==1&form==2&!is.na(realdivision)&!is.na(everplayedsex_2)])

#this is factor
realdiv <- factor(realdivision)
xf <- data.frame(everplayedsex_2,sampleSD,HIVtreat,unsampled,age,agemissing,boysonlyschool,girlsonlyschool,girlsperboy,missinggirlsperboy,realdiv)
subset.vect <- girl==1&form==2&!is.na(realdivision)&!is.na(everplayedsex_2)
xf.subset <- subset(xf, subset.vect)

probit.res <- zelig(everplayedsex_2 ~ sampleSD + HIVtreat + unsampled + age + agemissing + boysonlyschool + girlsonlyschool + girlsperboy + missinggirlsperboy + realdiv, data = xf.subset, model = "probit")

probit.coef <- coef(rare); 
probit.vcov <- vcov(rare)

rocplot

y.7 <- data.frame(multiplepartner,regpartner_2,gapabove5_2,giftpartner_2,everplayedsex_2,activeunsafe_2,condomlastsex_2)
print("sex==s & form==2 & SDtreat==0")
for(s in c("F", "M")) {
	print(s)
	for(i in 1:length(y.7[1,])) {
		yi <- y.7[!is.na(y.7[,i]) & sex==s & form==2 & SDtreat==0,i]
		yi.len <- length(yi)
		print(names(y.7)[i])
		print(yi.len)
		print(mean(yi))
	}
	print("------------")
}

print("sex==s & form==2")
for(s in c("F", "M")) {
	print(s)
	for(i in 1:length(y.7[1,])) {
		yi <- y.7[!is.na(y.7[,i]) & sex==s & form==2,i]
		yi.len <- length(yi)
		print(names(y.7)[i])
		print(yi.len)
		print(mean(yi))
	}
	print("------------")
}
#run analylisis
print("------------final results------------")
res.rowname <- c("RRTT information", "RRTT infor robust se", "Observations", "Mean of dependent var")
res.colname <- names(y.7)
reg.res <- matrix(nrow=length(res.rowname),ncol=length(res.colname))
rownames(reg.res) <- res.rowname
colnames(reg.res) <- res.colname
reg.res.list <- list()
reg.res.name <- c()
reg.res.index <- 1

rob.se.list <- list()
rob.se.index <- 1

control.names <- c("unsampled", "agemissing", "boysonlyschool", "girlsonlyschool", "girlsperboy", "missinggirlsperboy")
#Original results
for(rrtt.subvector.index in 1:length(rrtt.subvector[1,])) {
	for(form.value in c(1,2)) {
		for(s in c(1, 0)) {
			if(s == 1)
			{
				reg.res.name[reg.res.index] <- sprintf("%s_form_%d_%s","girl", form.value, colnames(rrtt.subvector)[rrtt.subvector.index])
			}
			else
			{
				reg.res.name[reg.res.index] <- sprintf("%s_form_%d_%s","boy", form.value, colnames(rrtt.subvector)[rrtt.subvector.index])
			}
			print(reg.res.name[reg.res.index])
			for(i in 1:length(res.colname)) {
				yi <- as.vector(y.7[,i])
				rrtt <- rrtt.subvector[,rrtt.subvector.index]
				subset.vector <- girl==s & form==form.value & !is.na(realdivision) & !is.na(yi) & !is.na(rrtt)
				#some control could be all 0. Need to excluded it
				#like for girls "gapabove5_2" boysonlyschool is all 0
				
				XD <- cbind(rrtt, age)
				for(ctrl.name in control.names)
				{
					if(sum(x.data[subset.vector, ctrl.name]) == 0)
					{
						print(c("girl == ",s, names(y.7)[i], ctrl.name, " dropped as all 0"))
					}
					else
					{
						XD <- cbind(XD, x.data[ctrl.name])
					}
				}
				XD <- as.matrix(XD)
				
				lm.res <- lm(yi ~ XD + factor(realdivision), data=list(yi, XD, realdivision), subset=subset.vector)
				rob.se.res <- robust.se(lm.res, cluster[subset.vector])

				#Save results
				rob.se.list[[rob.se.index]] <- rob.se.res
				
				rob.se.index <- rob.se.index + 1
				
				#save result for show
				#RR
				reg.res[1,i] <- rob.se.res[2,1]
				reg.res[2,i] <- rob.se.res[2,2]
				#Observations
				reg.res[3,i] <- sum(as.numeric(subset.vector))
				#mean
				reg.res[4,i] <- mean(yi[subset.vector & rrtt==0 & !is.na(yi)])
			}
			print("------------")
			reg.res.list[[reg.res.index]] <- reg.res
			reg.res.index <- reg.res.index + 1
		}
	}
}

for(reg.res.index in 1:length(reg.res.list)) {
	print(c("-----", reg.res.name[reg.res.index], "-----"))
	print(format(reg.res.list[[reg.res.index]], scientific=FALSE))
}
